http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java new file mode 100644 index 0000000..73809e8 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which assists in grouping quads by subject by reassigning their keys + * to be their subjects + * + * + * + * @param <TKey> + */ +public class QuadGroupBySubjectMapper<TKey> extends AbstractQuadGroupingMapper<TKey> { + + @Override + protected NodeWritable selectKey(Quad quad) { + return new NodeWritable(quad.getSubject()); + } +}
http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java new file mode 100644 index 0000000..9fde939 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by object by reassigning their + * keys to be their objects + * + * + * + * @param <TKey> + * Key type + */ +public class TripleGroupByObjectMapper<TKey> extends AbstractTripleGroupingMapper<TKey> { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getObject()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java new file mode 100644 index 0000000..dd15ef5 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by predicate by reassigning their + * keys to be their predicates + * + * + * + * @param <TKey> + * Key type + */ +public class TripleGroupByPredicateMapper<TKey> extends AbstractTripleGroupingMapper<TKey> { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getPredicate()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java new file mode 100644 index 0000000..f1116c1 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by subject by reassigning their + * keys to be their subjects + * + * + * + * @param <TKey> + * Key type + */ +public class TripleGroupBySubjectMapper<TKey> extends AbstractTripleGroupingMapper<TKey> { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getSubject()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java new file mode 100644 index 0000000..840d78c --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; + + +/** + * Abstract mapper implementation which splits the tuples into their constituent + * nodes preserving the keys as-is + * + * + * + * @param <TKey> + * Key type + * @param <TValue> + * Tuple type + * @param <T> + * Writable tuple type + */ +public abstract class AbstractNodeTupleSplitToNodesMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends + Mapper<TKey, T, TKey, NodeWritable> { + + @Override + protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException { + NodeWritable[] ns = this.split(value); + for (NodeWritable n : ns) { + context.write(key, n); + } + } + + /** + * Splits the node tuple type into the individual nodes + * + * @param tuple + * Tuple + * @return Nodes + */ + protected abstract NodeWritable[] split(T tuple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java new file mode 100644 index 0000000..7dc85fd --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; + + +/** + * Abstract mapper implementation which splits the tuples into their constituent + * nodes using the tuples as the keys and the nodes as the values + * + * + * + * @param <TKey> + * Key type + * @param <TValue> + * Tuple type + * @param <T> + * Writable tuple type + */ +public abstract class AbstractNodeTupleSplitWithNodesMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends + Mapper<TKey, T, T, NodeWritable> { + + @Override + protected void map(TKey key, T value, Context context) throws IOException, InterruptedException { + NodeWritable[] ns = this.split(value); + for (NodeWritable n : ns) { + context.write(value, n); + } + } + + /** + * Splits the node tuple type into the individual nodes + * + * @param tuple + * Tuple + * @return Nodes + */ + protected abstract NodeWritable[] split(T tuple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java new file mode 100644 index 0000000..c993810 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which splits quads into their constituent nodes preserving the + * existing keys as-is + * + * + * + * @param <TKey> + * Key type + */ +public class QuadSplitToNodesMapper<TKey> extends AbstractNodeTupleSplitToNodesMapper<TKey, Quad, QuadWritable> { + + @Override + protected NodeWritable[] split(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java new file mode 100644 index 0000000..09caef6 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which splits quads into their constituent nodes using the quad as + * the key and the nodes as the values + * + * + * + * @param <TKey> + * Key type + */ +public class QuadSplitWithNodesMapper<TKey> extends AbstractNodeTupleSplitWithNodesMapper<TKey, Quad, QuadWritable> { + + @Override + protected NodeWritable[] split(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java new file mode 100644 index 0000000..0ef02d9 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which splits triples into their constituent nodes + * + * + * + * @param <TKey> Key type + */ +public class TripleSplitToNodesMapper<TKey> extends AbstractNodeTupleSplitToNodesMapper<TKey, Triple, TripleWritable> { + + @Override + protected NodeWritable[] split(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java new file mode 100644 index 0000000..7b18f55 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which splits triples into their constituent nodes + * + * + * + * @param <TKey> Key type + */ +public class TripleSplitWithNodesMapper<TKey> extends AbstractNodeTupleSplitWithNodesMapper<TKey, Triple, TripleWritable> { + + @Override + protected NodeWritable[] split(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java new file mode 100644 index 0000000..76137fe --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * An abstract mapper which transforms triples into quads. Derived + * implementations may choose how the graph to which triples are assigned is + * decided. + * <p> + * Keys are left as is by this mapper. + * </p> + * + * + * + * @param <TKey> + */ +public abstract class AbstractTriplesToQuadsMapper<TKey> extends Mapper<TKey, TripleWritable, TKey, QuadWritable> { + + @Override + protected final void map(TKey key, TripleWritable value, Context context) throws IOException, InterruptedException { + Triple triple = value.get(); + Node graphNode = this.selectGraph(triple); + context.write(key, new QuadWritable(new Quad(graphNode, triple))); + } + + /** + * Selects the graph name to use for converting the given triple into a quad + * + * @param triple + * Triple + * @return Tuple + */ + protected abstract Node selectGraph(Triple triple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java new file mode 100644 index 0000000..048e669 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + + +/** + * A mapper which transforms quads into triples + * <p> + * Keys are left as is by this mapper. + * </p> + * + * + * + * @param <TKey> + * Key type + */ +public class QuadsToTriplesMapper<TKey> extends Mapper<TKey, QuadWritable, TKey, TripleWritable> { + + @Override + protected void map(TKey key, QuadWritable value, Context context) throws IOException, InterruptedException { + context.write(key, new TripleWritable(value.get().asTriple())); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java new file mode 100644 index 0000000..394d5fd --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which converts triples into quads using the subjects of the triples + * as the graph nodes + * + * + * @param <TKey> + * Key type + * + */ +public class TriplesToQuadsBySubjectMapper<TKey> extends AbstractTriplesToQuadsMapper<TKey> { + + @Override + protected final Node selectGraph(Triple triple) { + return triple.getSubject(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java new file mode 100644 index 0000000..ef19edf --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which converts triples to quads where all triples are placed in the + * same graph + * + * + * + * @param <TKey> + * Key type + */ +public class TriplesToQuadsConstantGraphMapper<TKey> extends AbstractTriplesToQuadsMapper<TKey> { + + private Node graphNode; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + this.graphNode = this.getGraphNode(); + } + + /** + * Gets the graph node that will be used for all quads, this will be called + * once and only once during the + * {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)} method and the + * value returned cached for use throughout the lifetime of this mapper. + * <p> + * This implementation always used the default graph as the graph for + * generated quads. You can override this method in your own derived + * implementation to put triples into a different graph than the default + * graph. + * </p> + * <p> + * If instead you wanted to select different graphs for each triple you + * should extend {@link AbstractTriplesToQuadsMapper} instead and override + * the {@link #selectGraph(Triple)} method which is sealed in this + * implementation. + * </p> + * + * @return + */ + protected Node getGraphNode() { + return Quad.defaultGraphNodeGenerated; + } + + @Override + protected final Node selectGraph(Triple triple) { + return this.graphNode; + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java new file mode 100644 index 0000000..32c40f7 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; + +/** + * Abstract tests for mappers + * + * + * @param <TKey> + * Mapper input key type + * @param <TValue> + * Mapper input value type + * @param <TIntermediateKey> + * Mapper output/Reducer input key type + * @param <TIntermediateValue> + * Mapper output/Reducer input value type + * @param <TReducedKey> + * Reducer output key type + * @param <TReducedValue> + * Reducer output value type + * + * + */ +public abstract class AbstractMapReduceTests<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> { + + /** + * Gets the mapper instance to test + * + * @return Mapper instance + */ + protected abstract Mapper<TKey, TValue, TIntermediateKey, TIntermediateValue> getMapperInstance(); + + /** + * Gets the reducer instance to test + * + * @return Reducer instance + */ + protected abstract Reducer<TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> getReducerInstance(); + + /** + * Gets a map reduce driver that can be used to create a test case + * + * @return Map reduce driver + */ + protected MapReduceDriver<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> getMapReduceDriver() { + return new MapReduceDriver<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue>( + this.getMapperInstance(), this.getReducerInstance()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java new file mode 100644 index 0000000..b40cbea --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; + +/** + * Abstract tests for mappers + * + * + * @param <TKeyIn> + * Input key type + * @param <TValueIn> + * Input value type + * @param <TKeyOut> + * Output key type + * @param <TValueOut> + * Output value type + * + */ +public abstract class AbstractMapperTests<TKeyIn, TValueIn, TKeyOut, TValueOut> { + + /** + * Gets the mapper instance to test + * + * @return Mapper instance + */ + protected abstract Mapper<TKeyIn, TValueIn, TKeyOut, TValueOut> getInstance(); + + /** + * Gets a map driver that can be used to create a test case + * + * @return Map driver + */ + protected MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> getMapDriver() { + MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> driver = new MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut>(this.getInstance()); + this.configureDriver(driver); + return driver; + } + + /** + * Method that may be overridden by test harnesses which need to configure the driver in more detail e.g. add configuration keys + * @param driver Driver + */ + protected void configureDriver(MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> driver) { + // Does nothing + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java new file mode 100644 index 0000000..ca1ccb0 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java @@ -0,0 +1,114 @@ +package org.apache.jena.hadoop.rdf.mapreduce; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +public class TestDistinctTriples + extends + AbstractMapReduceTests<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> { + + @Override + protected Mapper<LongWritable, TripleWritable, TripleWritable, NullWritable> getMapperInstance() { + return new ValuePlusNullMapper<LongWritable, TripleWritable>(); + } + + @Override + protected Reducer<TripleWritable, NullWritable, NullWritable, TripleWritable> getReducerInstance() { + return new NullPlusKeyReducer<TripleWritable, NullWritable>(); + } + + @Test + public void distinct_triples_01() throws IOException { + MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + TripleWritable tw = new TripleWritable(t); + driver.addInput(new LongWritable(1), tw); + driver.addOutput(NullWritable.get(), tw); + + driver.runTest(); + } + + @Test + public void distinct_triples_02() throws IOException { + MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + TripleWritable tw = new TripleWritable(t); + for (int i = 0; i < 100; i++) { + driver.addInput(new LongWritable(i), tw); + } + driver.addOutput(NullWritable.get(), tw); + + driver.runTest(); + } + + @Test + public void distinct_triples_03() throws IOException { + MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2")); + Assert.assertNotEquals(t, t2); + + TripleWritable tw = new TripleWritable(t); + TripleWritable tw2 = new TripleWritable(t2); + Assert.assertNotEquals(tw, tw2); + + driver.addInput(new LongWritable(1), tw); + driver.addInput(new LongWritable(2), tw2); + driver.addOutput(NullWritable.get(), tw); + driver.addOutput(NullWritable.get(), tw2); + + driver.runTest(false); + } + + @Test + public void distinct_triples_04() throws IOException { + MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this + .getMapReduceDriver(); + + Node s1 = NodeFactory.createURI("urn:nf#cbf2b2c7-109e-4097-bbea-f67f272c7fcc"); + Node s2 = NodeFactory.createURI("urn:nf#bb08b75c-1ad2-47ef-acd2-eb2d92b94b89"); + Node p = NodeFactory.createURI("urn:p"); + Node o = NodeFactory.createURI("urn:66.230.159.118"); + Assert.assertNotEquals(s1, s2); + + Triple t1 = new Triple(s1, p, o); + Triple t2 = new Triple(s2, p, o); + Assert.assertNotEquals(t1, t2); + + TripleWritable tw1 = new TripleWritable(t1); + TripleWritable tw2 = new TripleWritable(t2); + Assert.assertNotEquals(tw1, tw2); + Assert.assertNotEquals(0, tw1.compareTo(tw2)); + + driver.addInput(new LongWritable(1), tw1); + driver.addInput(new LongWritable(2), tw2); + driver.addOutput(NullWritable.get(), tw1); + driver.addOutput(NullWritable.get(), tw2); + + driver.runTest(false); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java new file mode 100644 index 0000000..b2d0b92 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.AbstractCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.junit.Test; + +import com.hp.hpl.jena.graph.NodeFactory; + +/** + * Abstract tests for the {@link AbstractCharacteristicSetGeneratingReducer} + * + * + * + * @param <TValue> + * @param <T> + */ +public abstract class AbstractCharacteristicSetGeneratingReducerTests<TValue, T extends AbstractNodeTupleWritable<TValue>> + extends AbstractMapReduceTests<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> { + + /** + * Create a tuple + * + * @param i + * Key to use in creating the subject + * @param predicateUri + * Predicate URI string + * @return Tuple + */ + protected abstract T createTuple(int i, String predicateUri); + + /** + * Creates a set consisting of the given predicates + * + * @param predicates + * Predicates + * @return Set + */ + protected CharacteristicSetWritable createSet(MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver, int occurrences, String... predicates) { + CharacteristicSetWritable set = new CharacteristicSetWritable(); + for (String predicateUri : predicates) { + set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri))); + } + for (int i = 1; i <= occurrences; i++) { + driver.addOutput(set, NullWritable.get()); + } + return set; + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_01() throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + + this.createSet(driver, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_02() throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + driver.addInput(new LongWritable(1), tuple); + + this.createSet(driver, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_03() throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(2, "http://predicate"); + driver.addInput(new LongWritable(2), tuple); + + this.createSet(driver, 2, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_04() throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(1, "http://other"); + driver.addInput(new LongWritable(1), tuple); + + // Single entry sets + this.createSet(driver, 1, "http://predicate"); + this.createSet(driver, 1, "http://other"); + + // Two entry sets + this.createSet(driver, 1, "http://predicate", "http://other"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_05() throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(1, "http://other"); + driver.addInput(new LongWritable(2), tuple); + tuple = this.createTuple(1, "http://third"); + driver.addInput(new LongWritable(3), tuple); + + // Single entry sets + this.createSet(driver, 1, "http://predicate"); + this.createSet(driver, 1, "http://other"); + this.createSet(driver, 1, "http://third"); + + // Two entry sets + this.createSet(driver, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, "http://predicate", "http://third"); + this.createSet(driver, 1, "http://other", "http://third"); + + // Three entry sets + this.createSet(driver, 1, "http://predicate", "http://other", "http://third"); + + driver.runTest(false); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java new file mode 100644 index 0000000..30da730 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicWritable; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.graph.NodeFactory; + +/** + * Abstract tests for the {@link CharacteristicSetReducer} + * + * + */ +public class CharacteristicSetReducerTest + extends + AbstractMapReduceTests<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> { + + @Override + protected final Mapper<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable> getMapperInstance() { + // Identity mapper + return new Mapper<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable>(); + } + + @Override + protected final Reducer<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> getReducerInstance() { + return new CharacteristicSetReducer(); + } + + /** + * Creates a set consisting of the given predicates + * + * @param predicates + * Predicates + * @return Set + */ + protected CharacteristicSetWritable createSet( + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver, + int inputOccurrences, int outputOccurrences, String... predicates) { + CharacteristicSetWritable set = new CharacteristicSetWritable(); + for (String predicateUri : predicates) { + set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri))); + } + for (int i = 1; i <= inputOccurrences; i++) { + driver.addInput(set, set); + } + for (int i = 1; i <= outputOccurrences; i++) { + driver.addOutput(set, NullWritable.get()); + } + return set; + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_01() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_02() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate"); + + driver.runTest(false); + + List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run(); + CharacteristicSetWritable cw = results.get(0).getFirst(); + Assert.assertEquals(2, cw.getCount().get()); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_03() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_04() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + + List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run(); + for (Pair<CharacteristicSetWritable, NullWritable> pair : results) { + CharacteristicSetWritable cw = pair.getFirst(); + boolean expectTwo = cw.getCharacteristics().next().getNode().get().hasURI("http://predicate"); + Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get()); + } + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_05() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_06() throws IOException { + MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + + List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run(); + for (Pair<CharacteristicSetWritable, NullWritable> pair : results) { + CharacteristicSetWritable cw = pair.getFirst(); + boolean expectTwo = cw.hasCharacteristic("http://predicate"); + Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get()); + } + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java new file mode 100644 index 0000000..e647b68 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +/** + * Tests for the {@link TripleCharacteristicSetGeneratingReducer} + * + * + * + */ +public class TripleCharacteristicSetGeneratingReducerTest extends AbstractCharacteristicSetGeneratingReducerTests<Triple, TripleWritable> { + + @Override + protected Mapper<LongWritable, TripleWritable, NodeWritable, TripleWritable> getMapperInstance() { + return new TripleGroupBySubjectMapper<LongWritable>(); + } + + @Override + protected Reducer<NodeWritable, TripleWritable, CharacteristicSetWritable, NullWritable> getReducerInstance() { + return new TripleCharacteristicSetGeneratingReducer(); + } + + @Override + protected TripleWritable createTuple(int i, String predicateUri) { + return new TripleWritable(new Triple(NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI(predicateUri), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger))); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java new file mode 100644 index 0000000..ebdbcde --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.count.AbstractNodeTupleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.junit.Test; + + +/** + * Abstract tests for mappers derived from + * {@link AbstractNodeTupleNodeCountMapper} + * + * + * + * @param <TValue> + * Tuple type + * @param <T> + * Writable tuple type + */ +public abstract class AbstractNodeTupleNodeCountReducedTests<TValue, T extends AbstractNodeTupleWritable<TValue>> extends + AbstractMapReduceTests<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> { + + /** + * Generates tuples for the tests + * + * @param driver + * Driver + * @param num + * Number of tuples to generate + */ + protected void generateData(MapReduceDriver<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> driver, int num) { + Map<NodeWritable, Long> counts = new HashMap<NodeWritable, Long>(); + for (int i = 0; i < num; i++) { + LongWritable key = new LongWritable(i); + T value = this.createValue(i); + NodeWritable[] nodes = this.getNodes(value); + + driver.addInput(key, value); + for (NodeWritable n : nodes) { + if (counts.containsKey(n)) { + counts.put(n, counts.get(n) + 1); + } else { + counts.put(n, 1l); + } + } + } + + for (Entry<NodeWritable, Long> kvp : counts.entrySet()) { + driver.addOutput(kvp.getKey(), new LongWritable(kvp.getValue())); + } + } + + /** + * Creates a tuple value + * + * @param i + * Index + * @return Tuple value + */ + protected abstract T createValue(int i); + + /** + * Splits the tuple value into its constituent nodes + * + * @param tuple + * Tuple value + * @return Nodes + */ + protected abstract NodeWritable[] getNodes(T tuple); + + /** + * Runs a node count test + * + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected void testNodeCount(int num) throws IOException { + MapReduceDriver<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> driver = this.getMapReduceDriver(); + this.generateData(driver, num); + driver.runTest(false); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_01() throws IOException { + this.testNodeCount(1); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_02() throws IOException { + this.testNodeCount(100); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_03() throws IOException { + this.testNodeCount(1000); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_04() throws IOException { + this.testNodeCount(2500); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountTests.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountTests.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountTests.java new file mode 100644 index 0000000..e589b3a --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountTests.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; +import org.apache.jena.hadoop.rdf.mapreduce.count.AbstractNodeTupleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.junit.Test; + + +/** + * Abstract tests for mappers derived from + * {@link AbstractNodeTupleNodeCountMapper} + * + * + * + * @param <TValue> + * Tuple type + * @param <T> + * Writable tuple type + */ +public abstract class AbstractNodeTupleNodeCountTests<TValue, T extends AbstractNodeTupleWritable<TValue>> extends + AbstractMapperTests<LongWritable, T, NodeWritable, LongWritable> { + + /** + * Generates tuples for the tests + * + * @param driver + * Driver + * @param num + * Number of tuples to generate + */ + protected void generateData(MapDriver<LongWritable, T, NodeWritable, LongWritable> driver, int num) { + LongWritable expectedCount = new LongWritable(1); + for (int i = 0; i < num; i++) { + LongWritable key = new LongWritable(i); + T value = this.createValue(i); + NodeWritable[] nodes = this.getNodes(value); + + driver.addInput(key, value); + for (NodeWritable n : nodes) { + driver.addOutput(n, expectedCount); + } + } + } + + /** + * Creates a tuple value + * + * @param i + * Index + * @return Tuple value + */ + protected abstract T createValue(int i); + + /** + * Splits the tuple value into its constituent nodes + * + * @param tuple + * Tuple value + * @return Nodes + */ + protected abstract NodeWritable[] getNodes(T tuple); + + /** + * Runs a node count test + * + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected void testNodeCount(int num) throws IOException { + MapDriver<LongWritable, T, NodeWritable, LongWritable> driver = this.getMapDriver(); + this.generateData(driver, num); + driver.runTest(); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_01() throws IOException { + this.testNodeCount(1); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_02() throws IOException { + this.testNodeCount(100); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_03() throws IOException { + this.testNodeCount(1000); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_04() throws IOException { + this.testNodeCount(2500); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapReduceTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapReduceTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapReduceTest.java new file mode 100644 index 0000000..b453bee --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapReduceTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Tests for the {@link TripleNodeCountMapper} used in conjunction with the + * {@link NodeCountReducer} + * + * + * + */ +public class QuadNodeCountMapReduceTest extends AbstractNodeTupleNodeCountReducedTests<Quad, QuadWritable> { + + @Override + protected Mapper<LongWritable, QuadWritable, NodeWritable, LongWritable> getMapperInstance() { + return new QuadNodeCountMapper<LongWritable>(); + } + + @Override + protected Reducer<NodeWritable, LongWritable, NodeWritable, LongWritable> getReducerInstance() { + return new NodeCountReducer(); + } + + @Override + protected QuadWritable createValue(int i) { + return new QuadWritable(new Quad(Quad.defaultGraphNodeGenerated, new Triple( + NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI("http://predicate"), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger)))); + } + + @Override + protected NodeWritable[] getNodes(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapperTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapperTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapperTest.java new file mode 100644 index 0000000..869fc06 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapperTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Tests for the {@link QuadNodeCountMapper} + * + * + * + */ +public class QuadNodeCountMapperTest extends AbstractNodeTupleNodeCountTests<Quad, QuadWritable> { + + @Override + protected Mapper<LongWritable, QuadWritable, NodeWritable, LongWritable> getInstance() { + return new QuadNodeCountMapper<LongWritable>(); + } + + @Override + protected QuadWritable createValue(int i) { + return new QuadWritable(new Quad(Quad.defaultGraphNodeGenerated, new Triple( + NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI("http://predicate"), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger)))); + } + + @Override + protected NodeWritable[] getNodes(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapReduceTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapReduceTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapReduceTest.java new file mode 100644 index 0000000..660cfe7 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapReduceTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +/** + * Tests for the {@link TripleNodeCountMapper} used in conjunction with the + * {@link NodeCountReducer} + * + * + * + */ +public class TripleNodeCountMapReduceTest extends AbstractNodeTupleNodeCountReducedTests<Triple, TripleWritable> { + + @Override + protected Mapper<LongWritable, TripleWritable, NodeWritable, LongWritable> getMapperInstance() { + return new TripleNodeCountMapper<LongWritable>(); + } + + + @Override + protected Reducer<NodeWritable, LongWritable, NodeWritable, LongWritable> getReducerInstance() { + return new NodeCountReducer(); + } + + @Override + protected TripleWritable createValue(int i) { + return new TripleWritable( + new Triple(NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI("http://predicate"), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger))); + } + + @Override + protected NodeWritable[] getNodes(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/05c389be/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapperTest.java ---------------------------------------------------------------------- diff --git a/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapperTest.java b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapperTest.java new file mode 100644 index 0000000..fdac378 --- /dev/null +++ b/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapperTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +/** + * Tests for the {@link TripleNodeCountMapper} + * + * + * + */ +public class TripleNodeCountMapperTest extends AbstractNodeTupleNodeCountTests<Triple, TripleWritable> { + + @Override + protected Mapper<LongWritable, TripleWritable, NodeWritable, LongWritable> getInstance() { + return new TripleNodeCountMapper<LongWritable>(); + } + + @Override + protected TripleWritable createValue(int i) { + return new TripleWritable( + new Triple(NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI("http://predicate"), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger))); + } + + @Override + protected NodeWritable[] getNodes(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } + +}