[02/50] [abbrv] Move Jena Hadoop RDF Tools into new location for merging into main git repository

rvesse Mon, 20 Oct 2014 07:48:18 -0700

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
----------------------------------------------------------------------
diff --git 
a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
new file mode 100644
index 0000000..51b29cb
--- /dev/null
+++ 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import org.apache.jena.hadoop.rdf.mapreduce.transform.QuadsToTriplesMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.junit.Test;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Tests for the {@link QuadsToTriplesMapper}
+ * 
+ * 
+ * 
+ */
+public class QuadsToTriplesMapperTest extends 
AbstractMapperTests<LongWritable, QuadWritable, LongWritable, TripleWritable> {
+
+    @Override
+    protected Mapper<LongWritable, QuadWritable, LongWritable, TripleWritable> 
getInstance() {
+        return new QuadsToTriplesMapper<LongWritable>();
+    }
+
+    protected void generateData(MapDriver<LongWritable, QuadWritable, 
LongWritable, TripleWritable> driver, int num) {
+        for (int i = 0; i < num; i++) {
+            Triple t = new Triple(NodeFactory.createURI("http://subjects/"; + 
i), NodeFactory.createURI("http://predicate";),
+                    NodeFactory.createLiteral(Integer.toString(i), 
XSDDatatype.XSDinteger));
+            Quad q = new Quad(Quad.defaultGraphNodeGenerated, t);
+            driver.addInput(new LongWritable(i), new QuadWritable(q));
+            driver.addOutput(new LongWritable(i), new TripleWritable(t));
+        }
+    }
+
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void quads_to_triples_mapper_01() throws IOException {
+        MapDriver<LongWritable, QuadWritable, LongWritable, TripleWritable> 
driver = this.getMapDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("http://s";), 
NodeFactory.createURI("http://p";),
+                NodeFactory.createLiteral("test"));
+        Quad q = new Quad(Quad.defaultGraphNodeGenerated, t);
+        driver.withInput(new Pair<LongWritable, QuadWritable>(new 
LongWritable(1), new QuadWritable(q))).withOutput(
+                new Pair<LongWritable, TripleWritable>(new LongWritable(1), 
new TripleWritable(t)));
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void quads_to_triples_mapper_02() throws IOException {
+        MapDriver<LongWritable, QuadWritable, LongWritable, TripleWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 100);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void quads_to_triples_mapper_03() throws IOException {
+        MapDriver<LongWritable, QuadWritable, LongWritable, TripleWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 1000);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void quads_to_triples_mapper_04() throws IOException {
+        MapDriver<LongWritable, QuadWritable, LongWritable, TripleWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 10000);
+        driver.runTest();
+    }
+}


http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
----------------------------------------------------------------------
diff --git 
a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
new file mode 100644
index 0000000..bdf39f5
--- /dev/null
+++ 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import 
org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsBySubjectMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.junit.Test;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Tests for the {@link TriplesToQuadsBySubjectMapper}
+ * 
+ * 
+ * 
+ */
+public class TriplesToQuadsBySubjectMapperTest extends 
AbstractMapperTests<LongWritable, TripleWritable, LongWritable, QuadWritable> {
+
+    @Override
+    protected Mapper<LongWritable, TripleWritable, LongWritable, QuadWritable> 
getInstance() {
+        return new TriplesToQuadsBySubjectMapper<LongWritable>();
+    }
+
+    protected void generateData(MapDriver<LongWritable, TripleWritable, 
LongWritable, QuadWritable> driver, int num) {
+        for (int i = 0; i < num; i++) {
+            Triple t = new Triple(NodeFactory.createURI("http://subjects/"; + 
i), NodeFactory.createURI("http://predicate";),
+                    NodeFactory.createLiteral(Integer.toString(i), 
XSDDatatype.XSDinteger));
+            Quad q = new Quad(t.getSubject(), t);
+            driver.addInput(new LongWritable(i), new TripleWritable(t));
+            driver.addOutput(new LongWritable(i), new QuadWritable(q));
+        }
+    }
+
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_01() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("http://s";), 
NodeFactory.createURI("http://p";),
+                NodeFactory.createLiteral("test"));
+        Quad q = new Quad(t.getSubject(), t);
+        driver.withInput(new Pair<LongWritable, TripleWritable>(new 
LongWritable(1), new TripleWritable(t))).withOutput(
+                new Pair<LongWritable, QuadWritable>(new LongWritable(1), new 
QuadWritable(q)));
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_02() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 100);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_03() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 1000);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_04() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 10000);
+        driver.runTest();
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
----------------------------------------------------------------------
diff --git 
a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
new file mode 100644
index 0000000..b82f74b
--- /dev/null
+++ 
b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import 
org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.junit.Test;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Tests for the {@link TriplesToQuadsConstantGraphMapper}
+ * 
+ * 
+ * 
+ */
+public class TriplesToQuadsConstantGraphMapperTest extends 
AbstractMapperTests<LongWritable, TripleWritable, LongWritable, QuadWritable> {
+
+    @Override
+    protected Mapper<LongWritable, TripleWritable, LongWritable, QuadWritable> 
getInstance() {
+        return new TriplesToQuadsConstantGraphMapper<LongWritable>();
+    }
+
+    protected void generateData(MapDriver<LongWritable, TripleWritable, 
LongWritable, QuadWritable> driver, int num) {
+        for (int i = 0; i < num; i++) {
+            Triple t = new Triple(NodeFactory.createURI("http://subjects/"; + 
i), NodeFactory.createURI("http://predicate";),
+                    NodeFactory.createLiteral(Integer.toString(i), 
XSDDatatype.XSDinteger));
+            Quad q = new Quad(Quad.defaultGraphNodeGenerated, t);
+            driver.addInput(new LongWritable(i), new TripleWritable(t));
+            driver.addOutput(new LongWritable(i), new QuadWritable(q));
+        }
+    }
+
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_01() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("http://s";), 
NodeFactory.createURI("http://p";),
+                NodeFactory.createLiteral("test"));
+        Quad q = new Quad(Quad.defaultGraphNodeGenerated, t);
+        driver.withInput(new Pair<LongWritable, TripleWritable>(new 
LongWritable(1), new TripleWritable(t))).withOutput(
+                new Pair<LongWritable, QuadWritable>(new LongWritable(1), new 
QuadWritable(q)));
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_02() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 100);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_03() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 1000);
+        driver.runTest();
+    }
+    
+    /**
+     * Tests quads to triples conversion
+     * 
+     * @throws IOException
+     */
+    @Test
+    public void triples_to_quads_mapper_04() throws IOException {
+        MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> 
driver = this.getMapDriver();
+        this.generateData(driver, 10000);
+        driver.runTest();
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
----------------------------------------------------------------------
diff --git a/hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml 
b/hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
new file mode 100644
index 0000000..de72645
--- /dev/null
+++ b/hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
@@ -0,0 +1,46 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<assembly>
+  <id>hadoop-job</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <dependencySets>
+    <dependencySet>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+      <outputDirectory>lib</outputDirectory>
+      <excludes>
+        <exclude>${groupId}:${artifactId}</exclude>
+      </excludes>
+    </dependencySet>
+    <dependencySet>
+      <unpack>true</unpack>
+      <includes>
+        <include>${groupId}:${artifactId}</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/target/test-classes</directory>
+      <outputDirectory>/</outputDirectory>
+    </fileSet>
+ </fileSets>
+</assembly>

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-stats/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-rdf/hadoop-rdf-stats/pom.xml 
b/hadoop-rdf/hadoop-rdf-stats/pom.xml
new file mode 100644
index 0000000..bf69fa6
--- /dev/null
+++ b/hadoop-rdf/hadoop-rdf-stats/pom.xml
@@ -0,0 +1,103 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+       <parent>
+               <groupId>org.apache.jena</groupId>
+               <artifactId>jena-hadoop-rdf</artifactId>
+               <version>0.9.0-SNAPSHOT</version>
+       </parent>
+       <artifactId>jena-hadoop-rdf-stats</artifactId>
+       <name>Apache Jena - RDF Tools for Hadoop - Statistics Demo App</name>
+       <description>A demo application that can be run on Hadoop to produce a 
statistical analysis on arbitrary RDF inputs</description>
+
+       <dependencies>
+               <!-- Internal Project Dependencies -->
+               <dependency>
+                       <groupId>org.apache.jena</groupId>
+                       <artifactId>jena-hadoop-rdf-io</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.jena</groupId>
+                       <artifactId>jena-hadoop-rdf-mapreduce</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+
+               <!-- CLI related Dependencies -->
+               <dependency>
+                       <groupId>io.airlift</groupId>
+                       <artifactId>airline</artifactId>
+                       <version>0.6</version>
+               </dependency>
+
+               <!-- Hadoop Dependencies -->
+               <!-- Note these will be provided on the Hadoop cluster hence 
the provided 
+                       scope -->
+               <dependency>
+                       <groupId>org.apache.hadoop</groupId>
+                       <artifactId>hadoop-common</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.hadoop</groupId>
+                       <artifactId>hadoop-mapreduce-client-common</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+
+               <!-- Test Dependencies -->
+               <dependency>
+                       <groupId>org.apache.jena</groupId>
+                       <artifactId>jena-hadoop-rdf-mapreduce</artifactId>
+                       <version>${project.version}</version>
+                       <classifier>tests</classifier>
+                       <scope>test</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.mrunit</groupId>
+                       <artifactId>mrunit</artifactId>
+                       <scope>test</scope>
+                       <classifier>hadoop2</classifier>
+               </dependency>
+       </dependencies>
+
+       <build>
+               <plugins>
+                       <!-- Assembly plugin is used to produce the runnable 
Hadoop JAR with all 
+                               dependencies contained therein -->
+                       <plugin>
+                               <artifactId>maven-assembly-plugin</artifactId>
+                               <configuration>
+                                       <descriptors>
+                                               
<descriptor>hadoop-job.xml</descriptor>
+                                       </descriptors>
+                               </configuration>
+                               <executions>
+                                       <execution>
+                                               <id>make-assembly</id>
+                                               <phase>package</phase>
+                                               <goals>
+                                                       <goal>single</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                       </plugin>
+               </plugins>
+       </build>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
----------------------------------------------------------------------
diff --git 
a/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
 
b/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
new file mode 100644
index 0000000..917176e
--- /dev/null
+++ 
b/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
@@ -0,0 +1,408 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.stats;
+
+import io.airlift.command.Arguments;
+import io.airlift.command.Command;
+import io.airlift.command.Help;
+import io.airlift.command.HelpOption;
+import io.airlift.command.Option;
+import io.airlift.command.OptionType;
+import io.airlift.command.ParseArgumentsMissingException;
+import io.airlift.command.ParseArgumentsUnexpectedException;
+import io.airlift.command.ParseException;
+import io.airlift.command.ParseOptionMissingException;
+import io.airlift.command.ParseOptionMissingValueException;
+import io.airlift.command.SingleCommand;
+import io.airlift.command.model.CommandMetadata;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import javax.inject.Inject;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory;
+
+
+/**
+ * Entry point for the Hadoop job, handles launching all the relevant Hadoop
+ * jobs
+ * 
+ * 
+ * 
+ */
+@Command(name = "bin/hadoop jar PATH_TO_JAR 
com.yarcdata.urika.hadoop.rdf.stats.RdfStats", description = "A command which 
computes statistics on RDF data using Hadoop")
+public class RdfStats implements Tool {
+
+    static final String ANSI_RED = "\u001B[31m";
+    static final String ANSI_RESET = "\u001B[0m";
+
+    private static final String DATA_TYPE_TRIPLES = "triples", DATA_TYPE_QUADS 
= "quads", DATA_TYPE_MIXED = "mixed";
+
+    /**
+     * Help option
+     */
+    @Inject
+    public HelpOption helpOption;
+
+    /**
+     * Gets/Sets whether all available statistics will be calculated
+     */
+    @Option(name = { "-a", "--all" }, description = "Requests that all 
available statistics be calculated", type = OptionType.COMMAND)
+    public boolean all = false;
+
+    /**
+     * Gets/Sets whether node usage counts will be calculated
+     */
+    @Option(name = { "-n", "--node-count" }, description = "Requests that node 
usage counts be calculated", type = OptionType.COMMAND)
+    public boolean nodeCount = false;
+
+    /**
+     * Gets/Sets whether characteristic sets will be calculated
+     */
+    @Option(name = { "-c", "--characteristic-sets" }, description = "Requests 
that characteristic sets be calculated", type = OptionType.COMMAND)
+    public boolean characteristicSets = false;
+
+    /**
+     * Gets/Sets whether type counts will be calculated
+     */
+    @Option(name = { "-t", "--type-counts" }, description = "Requests that 
rdf:type usage counts be calculated", type = OptionType.COMMAND)
+    public boolean typeCount = false;
+
+    /**
+     * Gets/Sets whether data type counts will be calculated
+     */
+    @Option(name = { "-d", "--data-types" }, description = "Requests that 
literal data type usage counts be calculated", type = OptionType.COMMAND)
+    public boolean dataTypeCount = false;
+
+    /**
+     * Gets/Sets whether namespace counts will be calculated
+     */
+    @Option(name = { "--namespaces" }, description = "Requests that namespace 
usage counts be calculated", type = OptionType.COMMAND)
+    public boolean namespaceCount = false;
+
+    /**
+     * Gets/Sets the input data type used
+     */
+    @Option(name = { "--input-type" }, allowedValues = { DATA_TYPE_MIXED, 
DATA_TYPE_QUADS, DATA_TYPE_TRIPLES }, description = "Specifies whether the 
input data is a mixture of quads and triples, just quads or just triples.  
Using the most specific data type will yield the most accurrate statistics")
+    public String inputType = DATA_TYPE_MIXED;
+
+    /**
+     * Gets/Sets the output path
+     */
+    @Option(name = { "-o", "--output" }, title = "OutputPath", description = 
"Sets the output path", arity = 1, required = true)
+    public String outputPath = null;
+
+    /**
+     * Gets/Sets the input path(s)
+     */
+    @Arguments(description = "Sets the input path(s)", title = "InputPath", 
required = true)
+    public List<String> inputPaths = new ArrayList<String>();
+
+    private Configuration config;
+
+    /**
+     * Entry point method
+     * 
+     * @param args
+     *            Arguments
+     * @throws Exception
+     */
+    public static void main(String[] args) throws Exception {
+        try {
+            // Run and exit with result code if no errors bubble up
+            // Note that the exit code may still be a error code
+            int res = ToolRunner.run(new Configuration(true), new RdfStats(), 
args);
+            System.exit(res);
+        } catch (Exception e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            e.printStackTrace(System.err);
+        } finally {
+            System.err.print(ANSI_RESET);
+        }
+        // If any errors bubble up exit with non-zero code
+        System.exit(1);
+    }
+
+    private static void showUsage() {
+        CommandMetadata metadata = 
SingleCommand.singleCommand(RdfStats.class).getCommandMetadata();
+        StringBuilder builder = new StringBuilder();
+        Help.help(metadata, builder);
+        System.err.print(ANSI_RESET);
+        System.err.println(builder.toString());
+        System.exit(1);
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+        this.config = conf;
+    }
+
+    @Override
+    public Configuration getConf() {
+        return this.config;
+    }
+
+    @Override
+    public int run(String[] args) throws Exception {
+        try {
+            // Parse custom arguments
+            RdfStats cmd = 
SingleCommand.singleCommand(RdfStats.class).parse(args);
+
+            // Copy Hadoop configuration across
+            cmd.setConf(this.getConf());
+
+            // Show help if requested and exit with success
+            if (cmd.helpOption.showHelpIfRequested()) {
+                return 0;
+            }
+
+            // Run the command and exit with success
+            cmd.run();
+            return 0;
+
+        } catch (ParseOptionMissingException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            System.err.println();
+            showUsage();
+        } catch (ParseOptionMissingValueException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            System.err.println();
+            showUsage();
+        } catch (ParseArgumentsMissingException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            System.err.println();
+            showUsage();
+        } catch (ParseArgumentsUnexpectedException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            System.err.println();
+            showUsage();
+            // TODO Re-enable as and when we upgrade Airline
+            // } catch (ParseOptionIllegalValueException e) {
+            // System.err.println(ANSI_RED + e.getMessage());
+            // System.err.println();
+            // showUsage();
+        } catch (ParseException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            System.err.println();
+            showUsage();
+        } catch (UnsupportedOperationException e) {
+            System.err.println(ANSI_RED + e.getMessage());
+        } catch (Throwable e) {
+            System.err.println(ANSI_RED + e.getMessage());
+            e.printStackTrace(System.err);
+        } finally {
+            System.err.print(ANSI_RESET);
+        }
+        return 1;
+    }
+
+    private void run() throws Throwable {
+        if (!this.outputPath.endsWith("/")) {
+            this.outputPath += "/";
+        }
+
+        // If all statistics requested turn on all statistics
+        if (this.all) {
+            this.nodeCount = true;
+            this.characteristicSets = true;
+            this.typeCount = true;
+            this.dataTypeCount = true;
+            this.namespaceCount = true;
+        }
+
+        // How many statistics were requested?
+        int statsRequested = 0;
+        if (this.nodeCount)
+            statsRequested++;
+        if (this.characteristicSets)
+            statsRequested++;
+        if (this.typeCount)
+            statsRequested++;
+        if (this.dataTypeCount)
+            statsRequested++;
+        if (this.namespaceCount)
+            statsRequested++;
+
+        // Error if no statistics requested
+        if (statsRequested == 0) {
+            System.err
+                    .println("You did not request any statistics to be 
calculated, please use one/more of the relevant options to select the 
statistics to be computed");
+            return;
+        }
+        int statsComputed = 1;
+
+        // Compute statistics
+        if (this.nodeCount) {
+            Job job = this.selectNodeCountJob();
+            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
+        }
+        if (this.typeCount) {
+            Job[] jobs = this.selectTypeCountJobs();
+            statsComputed = this.computeStatistic(jobs, false, false, 
statsComputed, statsRequested);
+        }
+        if (this.dataTypeCount) {
+            Job job = this.selectDataTypeCountJob();
+            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
+        }
+        if (this.namespaceCount) {
+            Job job = this.selectNamespaceCountJob();
+            statsComputed = this.computeStatistic(job, statsComputed, 
statsRequested);
+        }
+        if (this.characteristicSets) {
+            Job[] jobs = this.selectCharacteristicSetJobs();
+            statsComputed = this.computeStatistic(jobs, false, false, 
statsComputed, statsRequested);
+        }
+    }
+
+    private int computeStatistic(Job job, int statsComputed, int 
statsRequested) throws Throwable {
+        System.out.println(String.format("Computing Statistic %d of %d 
requested", statsComputed, statsRequested));
+        this.runJob(job);
+        System.out.println(String.format("Computed Statistic %d of %d 
requested", statsComputed, statsRequested));
+        System.out.println();
+        return ++statsComputed;
+    }
+
+    private int computeStatistic(Job[] jobs, boolean continueOnFailure, 
boolean continueOnError, int statsComputed,
+            int statsRequested) {
+        System.out.println(String.format("Computing Statistic %d of %d 
requested", statsComputed, statsRequested));
+        this.runJobSequence(jobs, continueOnFailure, continueOnError);
+        System.out.println(String.format("Computed Statistic %d of %d 
requested", statsComputed, statsRequested));
+        System.out.println();
+        return ++statsComputed;
+    }
+
+    private boolean runJob(Job job) throws Throwable {
+        System.out.println("Submitting Job " + job.getJobName());
+        long start = System.nanoTime();
+        try {
+            job.submit();
+            if (job.monitorAndPrintJob()) {
+                System.out.println("Job " + job.getJobName() + " succeeded");
+                return true;
+            } else {
+                System.out.println("Job " + job.getJobName() + " failed");
+                return false;
+            }
+        } catch (Throwable e) {
+            System.out.println("Unexpected failure in Job " + 
job.getJobName());
+            throw e;
+        } finally {
+            long end = System.nanoTime();
+            System.out.println("Job " + job.getJobName() + " finished after "
+                    + String.format("%,d milliseconds", 
TimeUnit.NANOSECONDS.toMillis(end - start)));
+            System.out.println();
+        }
+    }
+
+    private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean 
continueOnError) {
+        for (int i = 0; i < jobs.length; i++) {
+            Job job = jobs[i];
+            try {
+                boolean success = this.runJob(job);
+                if (!success && !continueOnFailure)
+                    throw new IllegalStateException("Unable to complete job 
sequence because Job " + job.getJobName() + " failed");
+            } catch (IllegalStateException e) {
+                throw e;
+            } catch (Throwable e) {
+                if (!continueOnError)
+                    throw new IllegalStateException("Unable to complete job 
sequence because job " + job.getJobName()
+                            + " errorred", e);
+            }
+        }
+    }
+
+    private Job selectNodeCountJob() throws IOException {
+        String realOutputPath = outputPath + "node-counts/";
+        String[] inputs = new String[this.inputPaths.size()];
+        this.inputPaths.toArray(inputs);
+
+        if (DATA_TYPE_QUADS.equals(this.inputType)) {
+            return JobFactory.getQuadNodeCountJob(this.config, inputs, 
realOutputPath);
+        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+            return JobFactory.getTripleNodeCountJob(this.config, inputs, 
realOutputPath);
+        } else {
+            return JobFactory.getNodeCountJob(this.config, inputs, 
realOutputPath);
+        }
+    }
+
+    private Job selectDataTypeCountJob() throws IOException {
+        String realOutputPath = outputPath + "data-type-counts/";
+        String[] inputs = new String[this.inputPaths.size()];
+        this.inputPaths.toArray(inputs);
+
+        if (DATA_TYPE_QUADS.equals(this.inputType)) {
+            return JobFactory.getQuadDataTypeCountJob(this.config, inputs, 
realOutputPath);
+        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+            return JobFactory.getTripleDataTypeCountJob(this.config, inputs, 
realOutputPath);
+        } else {
+            return JobFactory.getDataTypeCountJob(this.config, inputs, 
realOutputPath);
+        }
+    }
+
+    private Job selectNamespaceCountJob() throws IOException {
+        String realOutputPath = outputPath + "namespace-counts/";
+        String[] inputs = new String[this.inputPaths.size()];
+        this.inputPaths.toArray(inputs);
+
+        if (DATA_TYPE_QUADS.equals(this.inputType)) {
+            return JobFactory.getQuadNamespaceCountJob(this.config, inputs, 
realOutputPath);
+        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+            return JobFactory.getTripleNamespaceCountJob(this.config, inputs, 
realOutputPath);
+        } else {
+            return JobFactory.getNamespaceCountJob(this.config, inputs, 
realOutputPath);
+        }
+    }
+
+    private Job[] selectCharacteristicSetJobs() throws IOException {
+        String intermediateOutputPath = outputPath + 
"characteristics/intermediate/";
+        String finalOutputPath = outputPath + "characteristics/final/";
+        String[] inputs = new String[this.inputPaths.size()];
+        this.inputPaths.toArray(inputs);
+
+        if (DATA_TYPE_QUADS.equals(this.inputType)) {
+            return JobFactory.getQuadCharacteristicSetJobs(this.config, 
inputs, intermediateOutputPath, finalOutputPath);
+        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+            return JobFactory.getTripleCharacteristicSetJobs(this.config, 
inputs, intermediateOutputPath, finalOutputPath);
+        } else {
+            return JobFactory.getCharacteristicSetJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
+        }
+    }
+
+    private Job[] selectTypeCountJobs() throws IOException {
+        String intermediateOutputPath = outputPath + "type-declarations/";
+        String finalOutputPath = outputPath + "type-counts/";
+        String[] inputs = new String[this.inputPaths.size()];
+        this.inputPaths.toArray(inputs);
+
+        if (DATA_TYPE_QUADS.equals(this.inputType)) {
+            return JobFactory.getQuadTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
+        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+            return JobFactory.getTripleTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
+        } else {
+            return JobFactory.getTypeCountJobs(this.config, inputs, 
intermediateOutputPath, finalOutputPath);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
----------------------------------------------------------------------
diff --git 
a/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
 
b/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
new file mode 100644
index 0000000..10ba65d
--- /dev/null
+++ 
b/hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
@@ -0,0 +1,757 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.stats.jobs;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.BZip2Codec;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.jena.hadoop.rdf.io.input.NQuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.NTriplesInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.output.NQuadsOutputFormat;
+import org.apache.jena.hadoop.rdf.io.output.NTriplesNodeOutputFormat;
+import org.apache.jena.hadoop.rdf.io.output.NTriplesOutputFormat;
+import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
+import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
+import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
+import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer;
+import 
org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper;
+import 
org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.vocabulary.RDF;
+
+/**
+ * Factory that can produce {@link Job} instances for computing various RDF
+ * statistics
+ * 
+ * 
+ * 
+ */
+public class JobFactory {
+
+    /**
+     * Private constructor prevents instantiation
+     */
+    private JobFactory() {
+    }
+
+    /**
+     * Gets a job for computing node counts on RDF triple inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getTripleNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Node Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(TripleNodeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing node counts on RDF quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getQuadNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Node Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadNodeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(QuadsInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing node counts on RDF triple and/or quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getNodeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Node Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadNodeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a sequence of jobs that can be used to compute characteristic sets
+     * for RDF triples
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Intermediate output path
+     * @param outputPath
+     *            Final output path
+     * @return Sequence of jobs
+     * @throws IOException
+     */
+    public static Job[] getTripleCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Characteristic Set (Generation)");
+
+        // Map/Reduce classes
+        job.setMapperClass(TripleGroupBySubjectMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(TripleWritable.class);
+        job.setReducerClass(TripleCharacteristicSetGeneratingReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(NullWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesInputFormat.class);
+        job.setOutputFormatClass(SequenceFileOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+        SequenceFileOutputFormat.setCompressOutput(job, true);
+        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
+        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
+
+        jobs[0] = job;
+
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Characteristic Set (Reduction)");
+
+        // Map/Reduce classes
+        job.setMapperClass(KeyMapper.class);
+        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
+        job.setMapOutputValueClass(CharacteristicSetWritable.class);
+        job.setReducerClass(CharacteristicSetReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(CharacteristicSetWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(SequenceFileInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+        return jobs;
+    }
+
+    /**
+     * Gets a sequence of jobs that can be used to compute characteristic sets
+     * for RDF quads
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Intermediate output path
+     * @param outputPath
+     *            Final output path
+     * @return Sequence of jobs
+     * @throws IOException
+     */
+    public static Job[] getQuadCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Characteristic Set (Generation)");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadGroupBySubjectMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(QuadWritable.class);
+        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(NullWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(QuadsInputFormat.class);
+        job.setOutputFormatClass(SequenceFileOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+        SequenceFileOutputFormat.setCompressOutput(job, true);
+        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
+        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
+
+        jobs[0] = job;
+
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Characteristic Set (Reduction)");
+
+        // Map/Reduce classes
+        job.setMapperClass(KeyMapper.class);
+        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
+        job.setMapOutputValueClass(CharacteristicSetWritable.class);
+        job.setReducerClass(CharacteristicSetReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(CharacteristicSetWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(SequenceFileInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+        return jobs;
+    }
+
+    /**
+     * Gets a sequence of jobs that can be used to compute characteristic sets
+     * for RDF triple and/or quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Intermediate output path
+     * @param outputPath
+     *            Final output path
+     * @return Sequence of jobs
+     * @throws IOException
+     */
+    public static Job[] getCharacteristicSetJobs(Configuration config, 
String[] inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Characteristic Set (Generation)");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadGroupBySubjectMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(QuadWritable.class);
+        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(NullWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+        job.setOutputFormatClass(SequenceFileOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+        SequenceFileOutputFormat.setCompressOutput(job, true);
+        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
+        SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
+
+        jobs[0] = job;
+
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Characteristic Set (Reduction)");
+
+        // Map/Reduce classes
+        job.setMapperClass(KeyMapper.class);
+        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
+        job.setMapOutputValueClass(CharacteristicSetWritable.class);
+        job.setReducerClass(CharacteristicSetReducer.class);
+        job.setOutputKeyClass(CharacteristicSetWritable.class);
+        job.setOutputValueClass(CharacteristicSetWritable.class);
+
+        // Input and Output
+        job.setInputFormatClass(SequenceFileInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+        return jobs;
+    }
+
+    /**
+     * Gets a job for computing type counts on RDF triple inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Path for intermediate output which will be all the type
+     *            declaration triples present in the inputs
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job[] getTripleTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Type Triples Extraction");
+
+        // Map/Reduce classes
+        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
+        job.setMapperClass(TripleFilterByPredicateUriMapper.class);
+        job.setMapOutputKeyClass(LongWritable.class);
+        job.setMapOutputValueClass(TripleWritable.class);
+
+        // Input and Output Format
+        job.setInputFormatClass(TriplesInputFormat.class);
+        job.setOutputFormatClass(NTriplesOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+
+        jobs[0] = job;
+
+        // Object Node Usage count job
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(TripleObjectCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(NTriplesInputFormat.class);
+        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
+                                                          // better if this was
+                                                          // intelligently
+                                                          // configured
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+
+        return jobs;
+    }
+
+    /**
+     * Gets a job for computing type counts on RDF quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Path for intermediate output which will be all the type
+     *            declaration quads present in the inputs
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job[] getQuadTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Type Quads Extraction");
+
+        // Map/Reduce classes
+        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
+        job.setMapperClass(QuadFilterByPredicateMapper.class);
+        job.setMapOutputKeyClass(LongWritable.class);
+        job.setMapOutputValueClass(QuadWritable.class);
+
+        // Input and Output Format
+        job.setInputFormatClass(QuadsInputFormat.class);
+        job.setOutputFormatClass(NQuadsOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+
+        jobs[0] = job;
+
+        // Object Node Usage count job
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadObjectCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(NQuadsInputFormat.class);
+        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
+                                                          // better if this was
+                                                          // intelligently
+                                                          // configured
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+
+        return jobs;
+    }
+
+    /**
+     * Gets a job for computing type counts on RDF triple and/or quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param intermediateOutputPath
+     *            Path for intermediate output which will be all the type
+     *            declaration quads present in the inputs
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job[] getTypeCountJobs(Configuration config, String[] 
inputPaths, String intermediateOutputPath,
+            String outputPath) throws IOException {
+        Job[] jobs = new Job[2];
+
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Type Extraction");
+
+        // Map/Reduce classes
+        
job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, 
RDF.type.getURI());
+        job.setMapperClass(QuadFilterByPredicateMapper.class);
+        job.setMapOutputKeyClass(LongWritable.class);
+        job.setMapOutputValueClass(QuadWritable.class);
+
+        // Input and Output Format
+        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+        job.setOutputFormatClass(NQuadsOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
+
+        jobs[0] = job;
+
+        // Object Node Usage count job
+        job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadObjectCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(NQuadsInputFormat.class);
+        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
+                                                          // better if this was
+                                                          // intelligently
+                                                          // configured
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, intermediateOutputPath);
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        jobs[1] = job;
+
+        return jobs;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF triple inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getTripleDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Literal Data Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(TripleDataTypeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getQuadDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Literal Data Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadDataTypeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(QuadsInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF triple and/or
+     * quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getDataTypeCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Literal Data Type Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadDataTypeCountMapper.class);
+        job.setMapOutputKeyClass(NodeWritable.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(NodeCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF triple inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getTripleNamespaceCountJob(Configuration config, 
String[] inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Triples Namespace Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(TripleNamespaceCountMapper.class);
+        job.setMapOutputKeyClass(Text.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(TextCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getQuadNamespaceCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Quads Namespace Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadNamespaceCountMapper.class);
+        job.setMapOutputKeyClass(Text.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(TextCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(QuadsInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+
+    /**
+     * Gets a job for computing literal data type counts on RDF triple and/or
+     * quad inputs
+     * 
+     * @param config
+     *            Configuration
+     * @param inputPaths
+     *            Input paths
+     * @param outputPath
+     *            Output path
+     * @return Job
+     * @throws IOException
+     */
+    public static Job getNamespaceCountJob(Configuration config, String[] 
inputPaths, String outputPath) throws IOException {
+        Job job = Job.getInstance(config);
+        job.setJarByClass(JobFactory.class);
+        job.setJobName("RDF Namespace Usage Count");
+
+        // Map/Reduce classes
+        job.setMapperClass(QuadNamespaceCountMapper.class);
+        job.setMapOutputKeyClass(Text.class);
+        job.setMapOutputValueClass(LongWritable.class);
+        job.setReducerClass(TextCountReducer.class);
+
+        // Input and Output
+        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+        job.setOutputFormatClass(TextOutputFormat.class);
+        FileInputFormat.setInputPaths(job, 
StringUtils.arrayToString(inputPaths));
+        FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+        return job;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-rdf/pom.xml b/hadoop-rdf/pom.xml
new file mode 100644
index 0000000..30a6fbd
--- /dev/null
+++ b/hadoop-rdf/pom.xml
@@ -0,0 +1,142 @@
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
+       license agreements. See the NOTICE file distributed with this work for 
additional 
+       information regarding copyright ownership. The ASF licenses this file 
to 
+       You under the Apache License, Version 2.0 (the "License"); you may not 
use 
+       this file except in compliance with the License. You may obtain a copy 
of 
+       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
+       by applicable law or agreed to in writing, software distributed under 
the 
+       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
+       OF ANY KIND, either express or implied. See the License for the 
specific 
+       language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+       <groupId>org.apache.jena</groupId>
+       <artifactId>jena-hadoop-rdf</artifactId>
+       <version>0.9.0-SNAPSHOT</version>
+       <packaging>pom</packaging>
+
+       <!-- <parent> <groupId>org.apache.jena</groupId> 
<artifactId>jena-parent</artifactId> 
+               <version>10-SNAPSHOT</version> <relativePath /> </parent> -->
+
+       <name>Apache Jena - RDF Tools for Hadoop</name>
+       <description>A collection of tools for working with RDF on the Hadoop 
platform</description>
+
+       <modules>
+               <module>hadoop-rdf-io</module>
+               <module>hadoop-rdf-common</module>
+               <module>hadoop-rdf-mapreduce</module>
+               <module>hadoop-rdf-stats</module>
+       </modules>
+
+       <!-- Properties common across all profiles -->
+       <properties>
+               <plugin.compiler.version>2.5.1</plugin.compiler.version>
+               <arq.version>2.12.1-SNAPSHOT</arq.version>
+               <junit.version>4.11</junit.version>
+               <mrunit.version>1.0.0</mrunit.version>
+       </properties>
+
+       <!-- Profiles to allow building for different Hadoop versions -->
+       <!-- Currently there is only a single profile targeting Hadoop 2.x 
because 
+               we're using the newer MRv2 APIs which aren't backwards 
compatible with Hadoop 
+               1.x versions -->
+       <profiles>
+               <!-- Hadoop 2.x Stable -->
+               <profile>
+                       <id>hadoop_2x</id>
+                       <activation>
+                               <activeByDefault>true</activeByDefault>
+                       </activation>
+                       <properties>
+                               <hadoop.version>2.4.1</hadoop.version>
+                       </properties>
+               </profile>
+
+               <!-- Hadoop 0.23 -->
+               <profile>
+                       <id>hadoop_023x</id>
+                       <properties>
+                               <hadoop.version>0.23.9</hadoop.version>
+                       </properties>
+               </profile>
+       </profiles>
+
+       <dependencyManagement>
+               <dependencies>
+                       <!-- Hadoop Dependencies -->
+                       <dependency>
+                               <groupId>org.apache.hadoop</groupId>
+                               <artifactId>hadoop-common</artifactId>
+                               <version>${hadoop.version}</version>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.hadoop</groupId>
+                               
<artifactId>hadoop-mapreduce-client-common</artifactId>
+                               <version>${hadoop.version}</version>
+                       </dependency>
+
+                       <!-- Jena Dependencies -->
+                       <dependency>
+                               <groupId>org.apache.jena</groupId>
+                               <artifactId>jena-arq</artifactId>
+                               <version>${arq.version}</version>
+                       </dependency>
+
+                       <!-- Test Dependencies -->
+                       <dependency>
+                               <groupId>org.apache.hadoop</groupId>
+                               <artifactId>hadoop-minicluster</artifactId>
+                               <version>${hadoop.version}</version>
+                       </dependency>
+                       <dependency>
+                               <groupId>junit</groupId>
+                               <artifactId>junit</artifactId>
+                               <version>${junit.version}</version>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.mrunit</groupId>
+                               <artifactId>mrunit</artifactId>
+                               <version>${mrunit.version}</version>
+                               <classifier>hadoop2</classifier>
+                       </dependency>
+               </dependencies>
+       </dependencyManagement>
+
+       <build>
+               <plugins>
+                       <plugin>
+                               <artifactId>maven-compiler-plugin</artifactId>
+                               <version>${plugin.compiler.version}</version>
+                               <configuration>
+                                       <source>1.7</source>
+                                       <target>1.7</target>
+                               </configuration>
+                       </plugin>
+                       <plugin>
+                               <groupId>org.apache.maven.plugins</groupId>
+                               <artifactId>maven-enforcer-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>enforce-java</id>
+                                               <goals>
+                                                       <goal>enforce</goal>
+                                               </goals>
+                                               <configuration>
+                                                       <rules>
+                                                               
<requireJavaVersion>
+                                                                       
<version>1.7.0</version>
+                                                               
</requireJavaVersion>
+                                                       </rules>
+                                                       <!-- Hadoop 
dependencies introduce a huge range of dependency convergence 
+                                                               issues. 
Therefore we don't fail the builds for these modules which is less 
+                                                               than ideal but 
far easier than trying to fix the mess that is Hadoops transitive 
+                                                               dependencies -->
+                                                       <fail>false</fail>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+               </plugins>
+       </build>
+</project>

http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
deleted file mode 100644
index 30a6fbd..0000000
--- a/pom.xml
+++ /dev/null
@@ -1,142 +0,0 @@
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
-       license agreements. See the NOTICE file distributed with this work for 
additional 
-       information regarding copyright ownership. The ASF licenses this file 
to 
-       You under the Apache License, Version 2.0 (the "License"); you may not 
use 
-       this file except in compliance with the License. You may obtain a copy 
of 
-       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
-       by applicable law or agreed to in writing, software distributed under 
the 
-       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
-       OF ANY KIND, either express or implied. See the License for the 
specific 
-       language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-       <groupId>org.apache.jena</groupId>
-       <artifactId>jena-hadoop-rdf</artifactId>
-       <version>0.9.0-SNAPSHOT</version>
-       <packaging>pom</packaging>
-
-       <!-- <parent> <groupId>org.apache.jena</groupId> 
<artifactId>jena-parent</artifactId> 
-               <version>10-SNAPSHOT</version> <relativePath /> </parent> -->
-
-       <name>Apache Jena - RDF Tools for Hadoop</name>
-       <description>A collection of tools for working with RDF on the Hadoop 
platform</description>
-
-       <modules>
-               <module>hadoop-rdf-io</module>
-               <module>hadoop-rdf-common</module>
-               <module>hadoop-rdf-mapreduce</module>
-               <module>hadoop-rdf-stats</module>
-       </modules>
-
-       <!-- Properties common across all profiles -->
-       <properties>
-               <plugin.compiler.version>2.5.1</plugin.compiler.version>
-               <arq.version>2.12.1-SNAPSHOT</arq.version>
-               <junit.version>4.11</junit.version>
-               <mrunit.version>1.0.0</mrunit.version>
-       </properties>
-
-       <!-- Profiles to allow building for different Hadoop versions -->
-       <!-- Currently there is only a single profile targeting Hadoop 2.x 
because 
-               we're using the newer MRv2 APIs which aren't backwards 
compatible with Hadoop 
-               1.x versions -->
-       <profiles>
-               <!-- Hadoop 2.x Stable -->
-               <profile>
-                       <id>hadoop_2x</id>
-                       <activation>
-                               <activeByDefault>true</activeByDefault>
-                       </activation>
-                       <properties>
-                               <hadoop.version>2.4.1</hadoop.version>
-                       </properties>
-               </profile>
-
-               <!-- Hadoop 0.23 -->
-               <profile>
-                       <id>hadoop_023x</id>
-                       <properties>
-                               <hadoop.version>0.23.9</hadoop.version>
-                       </properties>
-               </profile>
-       </profiles>
-
-       <dependencyManagement>
-               <dependencies>
-                       <!-- Hadoop Dependencies -->
-                       <dependency>
-                               <groupId>org.apache.hadoop</groupId>
-                               <artifactId>hadoop-common</artifactId>
-                               <version>${hadoop.version}</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.hadoop</groupId>
-                               
<artifactId>hadoop-mapreduce-client-common</artifactId>
-                               <version>${hadoop.version}</version>
-                       </dependency>
-
-                       <!-- Jena Dependencies -->
-                       <dependency>
-                               <groupId>org.apache.jena</groupId>
-                               <artifactId>jena-arq</artifactId>
-                               <version>${arq.version}</version>
-                       </dependency>
-
-                       <!-- Test Dependencies -->
-                       <dependency>
-                               <groupId>org.apache.hadoop</groupId>
-                               <artifactId>hadoop-minicluster</artifactId>
-                               <version>${hadoop.version}</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>junit</groupId>
-                               <artifactId>junit</artifactId>
-                               <version>${junit.version}</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.mrunit</groupId>
-                               <artifactId>mrunit</artifactId>
-                               <version>${mrunit.version}</version>
-                               <classifier>hadoop2</classifier>
-                       </dependency>
-               </dependencies>
-       </dependencyManagement>
-
-       <build>
-               <plugins>
-                       <plugin>
-                               <artifactId>maven-compiler-plugin</artifactId>
-                               <version>${plugin.compiler.version}</version>
-                               <configuration>
-                                       <source>1.7</source>
-                                       <target>1.7</target>
-                               </configuration>
-                       </plugin>
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-enforcer-plugin</artifactId>
-                               <executions>
-                                       <execution>
-                                               <id>enforce-java</id>
-                                               <goals>
-                                                       <goal>enforce</goal>
-                                               </goals>
-                                               <configuration>
-                                                       <rules>
-                                                               
<requireJavaVersion>
-                                                                       
<version>1.7.0</version>
-                                                               
</requireJavaVersion>
-                                                       </rules>
-                                                       <!-- Hadoop 
dependencies introduce a huge range of dependency convergence 
-                                                               issues. 
Therefore we don't fail the builds for these modules which is less 
-                                                               than ideal but 
far easier than trying to fix the mess that is Hadoops transitive 
-                                                               dependencies -->
-                                                       <fail>false</fail>
-                                               </configuration>
-                                       </execution>
-                               </executions>
-                       </plugin>
-               </plugins>
-       </build>
-</project>

[02/50] [abbrv] Move Jena Hadoop RDF Tools into new location for merging into main git repository

Reply via email to